import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.stats import spearmanr

# 读取数据
df = pd.read_excel("Media_6Metrics_DeviationSummary_new.xlsx")

# 6 个 deviation 维度
metrics = [
    "SigmoidDeviation_BalanceIndex",
    "SigmoidDeviation_ImpactIndex",
    "SigmoidDeviation_ToneIndex",
    "SigmoidDeviation_EventCount",
    "JS_Quad",
    "JS_Event"
]

X = df[metrics].copy()

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

pca = PCA(n_components=1)
pc1 = pca.fit_transform(X_std)

# PCA 权重（绝对值后再归一化，便于解释）
raw_weights = np.abs(pca.components_[0])
pca_weights = raw_weights / raw_weights.sum()

pca_weights_df = pd.DataFrame({
    "Metric": metrics,
    "PCA_Weight": pca_weights
})

print(pca_weights_df)

df["Deviation_PCA"] = np.dot(X, pca_weights)

df["Deviation_Equal"] = X.mean(axis=1)

# Spearman rank correlation
rho, pval = spearmanr(df["Deviation_Equal"], df["Deviation_PCA"])
print(f"Spearman rho = {rho:.3f}, p = {pval:.4f}")
